The objective of this project is to classify a given silhouette as one of three types of vehicle, using a set of features extracted from the silhouette. The vehicle may be viewed from one of many different angles.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
dataset = pd.read_csv("vehicle.csv")
dataset.head()
dataset.info()
dataset.isnull().sum()
dataset.describe().T
sns.pairplot(dataset)
plt.subplots(figsize=(10,10))
sns.heatmap(dataset.corr(), annot = True)
plt.show()
sns.scatterplot(dataset["compactness"], dataset["circularity"],hue=dataset["class"])
From the above figure, I doubt the following things
sns.boxplot(y = dataset["circularity"], x=dataset["class"])
sns.boxplot(y = dataset["compactness"], x=dataset["class"])
Outliers exist in circularity values for class "bus"
It's difficult to differentiate van and bus by checking the "circularity", but car has high circularity value. Since there are outliers in Circulariy for "bus" class, there are chances to misclassify "bus" as "car". So we need to handle outliers prior to training.
sns.scatterplot(dataset["compactness"], dataset["distance_circularity"],hue=dataset["class"])
Same observation as that of Compactness and Circularity
f,axes =plt.subplots(nrows=3,ncols=3, figsize=(15,15))
sns.scatterplot(dataset["compactness"], dataset["scatter_ratio"],hue=dataset["class"],ax=axes[0, 0])
sns.scatterplot(dataset["compactness"], dataset["elongatedness"],hue=dataset["class"],ax=axes[0, 1])
sns.scatterplot(dataset["compactness"], dataset["pr.axis_rectangularity"],hue=dataset["class"],ax=axes[0, 2])
sns.scatterplot(dataset["compactness"], dataset["max.length_rectangularity"],hue=dataset["class"],ax=axes[1, 0])
sns.scatterplot(dataset["compactness"], dataset["scaled_variance"],hue=dataset["class"],ax=axes[1, 1])
sns.scatterplot(dataset["compactness"], dataset["scaled_radius_of_gyration"],hue=dataset["class"],ax=axes[1, 2])
sns.scatterplot(dataset["compactness"], dataset["scaled_variance.1"],hue=dataset["class"],ax=axes[2, 0])
sns.scatterplot(dataset["circularity"], dataset["distance_circularity"],hue=dataset["class"],ax=axes[2,1])
sns.scatterplot(dataset["circularity"], dataset["scatter_ratio"],hue=dataset["class"],ax=axes[2,2])
Higher values of scatter_ratio for cars
f,axes =plt.subplots(nrows=3,ncols=3, figsize=(15,15))
sns.scatterplot(dataset["circularity"], dataset["scatter_ratio"],hue=dataset["class"],ax=axes[0, 0])
sns.scatterplot(dataset["circularity"], dataset["elongatedness"],hue=dataset["class"],ax=axes[0, 1])
sns.scatterplot(dataset["circularity"], dataset["pr.axis_rectangularity"],hue=dataset["class"],ax=axes[0, 2])
sns.scatterplot(dataset["circularity"], dataset["max.length_rectangularity"],hue=dataset["class"],ax=axes[1, 0])
sns.scatterplot(dataset["circularity"], dataset["scaled_variance"],hue=dataset["class"],ax=axes[1, 1])
sns.scatterplot(dataset["circularity"], dataset["scaled_radius_of_gyration"],hue=dataset["class"],ax=axes[1, 2])
sns.scatterplot(dataset["circularity"], dataset["scaled_variance.1"],hue=dataset["class"],ax=axes[2, 0])
sns.scatterplot(dataset["radius_ratio"], dataset["max.length_rectangularity"],hue=dataset["class"],ax=axes[2,1])
sns.scatterplot(dataset["elongatedness"], dataset["scatter_ratio"],hue=dataset["class"],ax=axes[2,2])
f,axes =plt.subplots(nrows=3,ncols=3, figsize=(15,15))
sns.boxplot(y=dataset["compactness"], x=dataset["class"],ax=axes[0, 0])
sns.boxplot(y=dataset["elongatedness"],x=dataset["class"],ax=axes[0, 1])
sns.boxplot(y=dataset["pr.axis_rectangularity"],x=dataset["class"],ax=axes[0, 2])
sns.boxplot(y=dataset["max.length_rectangularity"],x=dataset["class"],ax=axes[1, 0])
sns.boxplot(y=dataset["scaled_variance"],x=dataset["class"],ax=axes[1, 1])
sns.boxplot(y=dataset["scaled_radius_of_gyration"],x=dataset["class"],ax=axes[1, 2])
sns.boxplot(y=dataset["scaled_variance.1"],x=dataset["class"],ax=axes[2, 0])
sns.boxplot(y=dataset["distance_circularity"],x=dataset["class"],ax=axes[2,1])
sns.boxplot(y=dataset["scatter_ratio"],x=dataset["class"],ax=axes[2,2])
Most of the features for "bus" contain extreme values. So scaling of the dataset will help to handle the outlier to some extend
f,axes =plt.subplots(nrows=3,ncols=3, figsize=(15,15))
sns.boxplot(y=dataset["circularity"], x=dataset["class"],ax=axes[0, 0])
sns.boxplot(y=dataset["distance_circularity"],x=dataset["class"],ax=axes[0, 1])
sns.boxplot(y=dataset["pr.axis_aspect_ratio"],x=dataset["class"],ax=axes[0, 2])
sns.boxplot(y=dataset["max.length_aspect_ratio"],x=dataset["class"],ax=axes[1, 0])
sns.boxplot(y=dataset["scaled_radius_of_gyration.1"],x=dataset["class"],ax=axes[1, 1])
sns.boxplot(y=dataset["skewness_about.1"],x=dataset["class"],ax=axes[1, 2])
sns.boxplot(y=dataset["skewness_about.2"],x=dataset["class"],ax=axes[2, 0])
sns.boxplot(y=dataset["hollows_ratio"],x=dataset["class"],ax=axes[2,1])
Hollows ratio for "bus" is having lower values compared to"van" and "car"
max_length_aspect ratio is an important feature for identifying bus from other two classes.
distance_circularity is higher for car.
lower value of scaled_varience points to the class -"van"
Elongateness is higher for "van"
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
# module for splitting the entire dataset into training and testing dataset
from sklearn.model_selection import train_test_split
#import LogisticReGression Model
from sklearn.linear_model import LogisticRegression
#import NaiveBayes model
from sklearn.naive_bayes import GaussianNB
#import KNN model
from sklearn.neighbors import KNeighborsClassifier
#import SVM model
from sklearn.svm import SVC
#Import metrics modulel to calculate model performance
from sklearn import metrics
#import resample module for upsampling
from sklearn.utils import resample
# module for evaluating the feature dependecies with target variable
import statsmodels.formula.api as sm
# module for cross validation
from sklearn.model_selection import cross_val_score
# module for AUC-ROC Calculation
from sklearn.metrics import roc_curve, auc
# module for SVM hyperparameter tuning
from sklearn.model_selection import GridSearchCV,RandomizedSearchCV
# module for ensembled models
from sklearn.ensemble import RandomForestClassifier, BaggingClassifier, AdaBoostClassifier, VotingClassifier,GradientBoostingClassifier,ExtraTreesClassifier
# module for decision tree
from sklearn.tree import DecisionTreeClassifier
#
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
import lightgbm as lgb
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import random
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
class tuning_params:
def __init__(self):
self.random_forest = None
self.adaboost = None
self.svm = None
class feature_selection:
def __init__(self, features):
self.feature_importance_metrics = pd.DataFrame()
self.feature_importance_metrics["features"] = features
self.feature_importance_metrics["NetImportance"] = np.zeros(features.length)
def add_to_metrics(feature_importance,model_name):
self.feature_importance_metrics = pd.merge(self.feature_importance_metrics,feature_importance,on=["features"])
self.feature_importance_metrics["NetImportance"] = self.feature_importance_metrics.apply(lambda x: (x["NetImportance"] + x[model_name])/2)
def get_features(threshold):
return self.feature_importance_metrics[(self.feature_importance_metrics["NetImportance"]>threshold)]["features"]
hyperparams = tuning_params()
def drop_columns(df, col_names,inplace=False):
print("--------Dropping columns-----")
return df.drop(col_names,inplace = inplace, axis=1)
def transform_data(df):
# Create a new column CCUsage from CCAvg column
df["CCUsage"] = df.apply(lambda x: 1 if x.CCAvg < 3 else 2 if (x.CCAvg >= 3 and x.CCAvg <6) else 3 if (x.CCAvg >= 6 and x.CCAvg <8) else 4, axis=1)
# Create a new column IncomeStatus from Income
df["IncomeStatus"] =df.apply(lambda x: 1 if x.Income <110 else 2 if x.Income>=110 and x.Income <180 else 3,axis=1)
# Create a new column HasMortage from Mortage
df["HasMortgage"] = df.apply(lambda x: 0 if x.Mortgage == 0 else 1,axis=1)
# Scale the Age Column as a new column
df["Age_Scaled"] = df.apply(lambda x: (x.Age - df["Age"].mean())/df["Age"].std(),axis=1)
# New column after scaling experience column
df["Experience_Scaled"] = df.apply(lambda x: (x.Age - df["Experience"].mean())/df["Experience"].std(),axis=1)
return df
def get_features_and_target(df,target_column_name):
X=df.drop([target_column_name],axis=1)
y=df[target_column_name]
return X,y
def get_onehot_encoded_features(X):
categoryList = list(X.select_dtypes(include=['object']).columns)
dummies = pd.get_dummies(X[categoryList], prefix= categoryList)
X.drop(categoryList, axis=1, inplace = True)
X=pd.concat([X,dummies], axis =1)
return X
def handle_missing_values(df, mode):
MISSING_VALUE_HANDLER ={
"median": df.fillna(df.median()),
"mode": df.fillna(df.median()),
"mean": df.fillna(df.mean())
}
return MISSING_VALUE_HANDLER.get(mode)
def upsample_data(df,target_column_name, minority_class, majority_class, number_of_samples):
majority=df[df[target_column_name] == majority_class]
minority=df[df[target_column_name] == minority_class]
minority_upsampled = resample(minority,replace=True,n_samples = number_of_samples)
upsampled=pd.concat([minority_upsampled,majority])
return upsampled
def train_model(algorithm_name, model, X_train, X_test, y_train, y_test):
model.fit(X_train, y_train)
print("#############Evaluating the Performance of ",algorithm_name,"#############")
evaluate_performance(model, X_train,y_train, X_test, y_test)
print("#############Evaluation Completed for ",algorithm_name," ###################")
def evaluate_performance(model, X_train, y_train, X_test, y_test):
y_predict = model.predict(X_test)
print("\n")
print("Confusion Metrics:")
print(metrics.confusion_matrix(y_test, y_predict))
print("\n")
print("Model Performance Summary:")
print(metrics.classification_report(y_test, y_predict))
print("Evaluating the Cross Validation score")
#print(X_train.shape)
#print(y_train.shape)
accuracies= cross_val_score(estimator = model, X = X_train, y = y_train, cv = 5)
accuracies_mean=accuracies.mean()*100
print("Mean Accuracy:",accuracies_mean)
accuracies_std=accuracies.std()*100
print("Standard Deviation:",accuracies_std)
#plot_auc_roc_curve(y_test, y_predict)
def plot_auc_roc_curve(y_test,y_pred):
false_positive_rate, true_positive_rate, thresholds = roc_curve(y_test, y_pred)
roc_auc = auc(false_positive_rate, true_positive_rate)
plt.title('Receiver Operating Characteristic')
plt.plot(false_positive_rate, true_positive_rate, 'b',
label='AUC = %0.2f'% roc_auc)
plt.legend(loc='lower right')
plt.plot([0,1],[0,1],'r--')
plt.xlim([-0.1,1.2])
plt.ylim([-0.1,1.2])
plt.ylabel('True Positive Rate')
plt.xlabel('False Positive Rate')
plt.show()
def evaluate_bestK(X_train,y_train, X_test, y_test):
error_rate = []
for i in range(1,50):
knn = KNeighborsClassifier(n_neighbors = i)
knn.fit(X_train,y_train)
pred_i = knn.predict(X_test)
error_rate.append(np.mean(pred_i != y_test))
plt.figure(figsize=(10,6))
plt.plot(range(1,50), error_rate,color = "green", linestyle="dashed", marker = "o", markerfacecolor='red',markersize=10)
plt.title("Error Rate vs K")
plt.xlabel('K')
plt.ylabel('Error Rate')
plt.show()
return error_rate.index(min(error_rate)) + 1
def random_forest_hyperparams():
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 50, stop = 200, num = 5)]
# Number of features to consider at every split
max_features = ['auto', 'sqrt']
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(10, 20, num = 5)] ## change 10,20 and 2
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = [2, 5, 10,15]
# Minimum number of samples required at each leaf node
min_samples_leaf = [1, 2, 4,10]
# Method of selecting samples for training each tree
bootstrap = [True, False]
return n_estimators, max_features,max_depth, min_samples_split, min_samples_leaf, bootstrap
def adaboost_hyperparams():
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 50, stop = 200, num = 5)] ## play with start and stop
learning_rate = [1,0.1,0.01,0.001,0.5,0.05,0.005]
return n_estimators, learning_rate
def get_model_turning_params(modelname):
TUNING_PARAM_VALUES = {
"random_forest":
random_forest_hyperparams(),
"ada_boost":
adaboost_hyperparams()
}
return TUNING_PARAM_VALUES.get(modelname)
def initalize_tuning_params():
tuning_params_svm = {'C': [1,0.01,0.05,1], 'gamma': [1,0.1,0.01,0.002,0.0005,0.001,0.0001], 'kernel': ['rbf','linear']}
n_estimators, max_features,max_depth, min_samples_split, min_samples_leaf, bootstrap = get_model_turning_params("random_forest")
tuning_params_randomforest = {'n_estimators': n_estimators,
'max_features': max_features,
'max_depth': max_depth,
'min_samples_split': min_samples_split,
'min_samples_leaf': min_samples_leaf,
'bootstrap': bootstrap}
n_estimators, learning_rate = get_model_turning_params("ada_boost")
tuning_params_adaboost = {'n_estimators': n_estimators,
'max_features': max_features,
'learning_rate': learning_rate}
hyperparams.random_forest = tuning_params_randomforest
hyperparams.adaboost = tuning_params_adaboost
hyperparams.svm = tuning_params_svm
def evaluate_logisticregression_performance(X_train,y_train, X_test, y_test):
model = LogisticRegression()
train_model("Logistic Regression", model, X_train, X_test, y_train, y_test)
def evaluate_naivebayes_performance(X_train,y_train, X_test, y_test):
model = GaussianNB()
train_model("Naive Bayes", model, X_train, X_test, y_train, y_test)
def evaluate_knn_performance(X_train,y_train, X_test, y_test):
optimal_neighbor = evaluate_bestK(X_train, y_train, X_test, y_test)
print("Optimal value of K is: ",optimal_neighbor)
model = KNeighborsClassifier(n_neighbors= optimal_neighbor, weights="distance")
train_model("KNN", model, X_train, X_test, y_train, y_test)
def do_hyperparams_tuning_svm(X_train,y_train, X_test, y_test):
param_grid = {'C': [1,0.01,0.05,1], 'gamma': [1,0.1,0.01,0.002], 'kernel': ['rbf','linear']}
model = GridSearchCV(SVC(),param_grid,refit=True,verbose=3)
model.fit(X_train,y_train)
return model.best_estimator_
def evaluate_svm_performance(X_train,y_train, X_test, y_test):
svc_model = SVC()
train_model("SVM", svc_model, X_train, X_test, y_train, y_test)
def evaluate_adaboost_performance(X_train,y_train, X_test, y_test):
model = AdaBoostClassifier(DecisionTreeClassifier(),n_estimators = 3, learning_rate = 0.001)
train_model("AdaBoost", model, X_train, X_test, y_train, y_test)
def evaluate_gradientboost_performance(X_train,y_train, X_test, y_test):
model = GradientBoostingClassifier(learning_rate=0.01,random_state=1)
train_model("GradientBoost", model, X_train, X_test, y_train, y_test)
def evaluate_metabagging_performance(X_train,y_train, X_test, y_test):
model = BaggingClassifier(DecisionTreeClassifier(random_state=1))
train_model("MetaBagging", model, X_train, X_test, y_train, y_test)
def evaluate_randomforest_performance(X_train,y_train, X_test, y_test):
classifier = RandomForestClassifier(n_estimators = 10, criterion='entropy')
train_model("RandomForest", classifier, X_train, X_test, y_train, y_test)
def do_hyperparms_tuning_rf(X_train,y_train, X_test, y_test):
model = RandomizedSearchCV(estimator = RandomForestClassifier(), param_distributions = hyperparams.random_forest, n_iter = 100, cv = 3,
verbose=2, random_state=42, n_jobs = -1)
model.fit(X_train,y_train)
return model.best_estimator_
def evaluate_xgboost_performance(X_train,y_train, X_test, y_test):
model = XGBClassifier()
train_model("XGBoost", model, X_train, X_test, y_train, y_test)
def evaluate_catboost_performance(X_train,y_train, X_test, y_test):
model = CatBoostClassifier(iterations=100, depth=3, learning_rate=0.1, loss_function='Logloss')
train_model("Catboost", model, X_train, X_test, y_train, y_test)
def evaluate_extratreeclassifier_performance(X_train,y_train, X_test, y_test):
model = ExtraTreesClassifier(n_estimators=700,criterion= 'entropy',min_samples_split= 5,
max_depth= 50, min_samples_leaf= 5)
train_model("Extra Tree Classifier", model, X_train, X_test, y_train, y_test)
def show_feature_importance(X,model,threshold):
df= pd.DataFrame()
df['feature'] = X.columns
df['Importance Index']= model.feature_importances_
print(df)
#def evaluate_kmeans_clustering(data):
#kmeans = KMeans(n_clusters = 3, init = 'k-means++', random_state = 42, max_iter=300,n_init=3) ## n_init number of centroids initialized
#y_pred = kmeans.fit_predict(data)
#data['Cluster'] = data['class'].apply(lambda r: 0 if r["class"] == "van" else 1 if r["class"] == "car" else 2,axis=1)
#print(confusion_matrix(df['Cluster'],y_kmeans))
#print(classification_report(df['Cluster'],y_kmeans))
def do_pca(df_scaled):
cov_matrix = np.cov(df_scaled.T)
eigenvalues, eigenvectors = np.linalg.eig(cov_matrix)
print("EigenValues Size: ",eigenvalues.size)
print("EigenVectors: \n",eigenvalues)
eig_pairs = [(eigenvalues[index], eigenvectors[:,index]) for index in range(len(eigenvalues))]
eig_pairs.sort()
eig_pairs.reverse()
eigvalues_sorted = [eig_pairs[index][0] for index in range(len(eigenvalues))]
eigvectors_sorted = [eig_pairs[index][1] for index in range(len(eigenvalues))]
tot = sum(eigenvalues)
var_explained = [(i / tot) for i in sorted(eigenvalues, reverse=True)]
cum_var_exp = np.cumsum(var_explained)
print("Cumulative Variance: \n",cum_var_exp)
plt.bar(range(1,19), var_explained, alpha=0.5, align='center', label='individual explained variance')
plt.step(range(1,19),cum_var_exp, where= 'mid', label='cumulative explained variance')
plt.ylabel('Explained variance ratio')
plt.xlabel('Principal components')
plt.legend(loc = 'best')
plt.show()
vehicles_df = pd.read_csv("vehicle.csv")
vehicles_df.head()
No Categorical values in the data other than the class column
X, y = X,y = get_features_and_target(vehicles_df,"class")
X.shape
y.shape
X.isnull().sum()
vehicles_df_clean = vehicles_df.fillna(X.median())
vehicles_df_filled = vehicles_df_clean[(vehicles_df.isnull().any(axis=1))]
vehicles_df_original = vehicles_df.dropna()
vehicles_df_original.shape
vehicles_df_filled.shape
X_original,y_original = get_features_and_target(vehicles_df_original,"class")
X_filled,y_filled = get_features_and_target(vehicles_df_filled,"class")
X_train, X_test, y_train, y_test = train_test_split(X_original,y_original,test_size=0.34,random_state=0)
X_train=pd.concat([X_train,X_filled])
y_train =pd.concat([y_train,y_filled])
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)
y_train.value_counts()
y_test.value_counts()
initalize_tuning_params()
evaluate_logisticregression_performance(X_train,y_train,X_test,y_test)
evaluate_naivebayes_performance(X_train,y_train,X_test,y_test)
evaluate_knn_performance(X_train,y_train,X_test,y_test)
evaluate_svm_performance(X_train,y_train,X_test,y_test)
model=do_hyperparams_tuning_svm(X_train,y_train,X_test,y_test)
train_model("SVM (with hyperparam tuning)", model, X_train, X_test, y_train, y_test)
evaluate_adaboost_performance(X_train,y_train,X_test,y_test)
evaluate_gradientboost_performance(X_train,y_train,X_test,y_test)
evaluate_metabagging_performance(X_train,y_train,X_test,y_test)
evaluate_randomforest_performance(X_train,y_train,X_test,y_test)
model = do_hyperparms_tuning_rf(X_train,y_train,X_test,y_test)
train_model("RandomForest (With hyperparam tuning)", model, X_train, X_test, y_train, y_test)
sc= StandardScaler()
df_scaled = sc.fit_transform(X_train)
do_pca(df_scaled)
P_reduce = np.array(eigvectors_sorted[0:8])
X_train_reduced = np.dot(df_scaled,P_reduce.T)
X_train_reduced = pd.DataFrame(X_train_reduced)
X_train_reduced.head()
X_train_reduced.shape
sns.pairplot(X_train_reduced)
sc= StandardScaler()
df_scaled_test = sc.fit_transform(X_test)
X_test_reduced = np.dot(df_scaled_test,P_reduce.T)
X_test_reduced = pd.DataFrame(X_test_reduced)
X_test_reduced.head()
X_test_reduced.shape
evaluate_logisticregression_performance(X_train_reduced,y_train,X_test_reduced,y_test)
evaluate_naivebayes_performance(X_train_reduced,y_train,X_test_reduced,y_test)
evaluate_knn_performance(X_train_reduced,y_train,X_test_reduced,y_test)
evaluate_svm_performance(X_train_reduced,y_train,X_test_reduced,y_test)
model=do_hyperparams_tuning_svm(X_train_reduced,y_train,X_test_reduced,y_test)
train_model("SVM (with hyperparam tuning)", model, X_train_reduced, X_test_reduced, y_train, y_test)
evaluate_gradientboost_performance(X_train_reduced,y_train,X_test_reduced,y_test)
vehicles_df["class"] = vehicles_df.apply(lambda r: 0 if r["class"] == "van" else 1 if r["class"] == "car" else 2,axis=1)
vehicles_df = vehicles_df.fillna(X.median())
vehicles_df.head()
def evaluate_kmeans_clustering(data):
kmeans = KMeans(n_clusters = 3, init = 'k-means++', random_state = 42, max_iter=300)
y_pred = kmeans.fit_predict(data.drop("class",axis=1))
print(y_pred)
#data['Cluster'] = data.apply((lambda r: 0 if r["class"] == "van" else 1 if r["class"] == "car" else 2),axis=1)
print(metrics.confusion_matrix(data['class'],y_pred))
print(metrics.classification_report(data['class'],y_pred))
evaluate_kmeans_clustering(vehicles_df)
sc= StandardScaler()
df_scaled = sc.fit_transform(vehicles_df.drop("class",axis=1))
do_pca(df_scaled)
P_reduce = np.array(eigvectors_sorted[0:8])
X_reduced = np.dot(df_scaled,P_reduce.T)
X_reduced = pd.DataFrame(X_reduced)
sns.pairplot(X_reduced)
df=pd.concat([X_reduced,y],axis=1)
df.head()
df['class'] = df.apply((lambda r: 0 if r["class"] == "van" else 1 if r["class"] == "car" else 2),axis=1)
evaluate_kmeans_clustering(df)
SVM performed very well with this dataset.
The following activities are done to make the model performance better:
1. Tuned hyperparameters (C = 0.1, kernel= rbf)
2. Model is trained and tested over Principal Components(top 8)
3. Model property:
F1 Score :
bus 0.98
car 0.95
van 0.90
Standard Deviation: 1.3
The model predicts the class with 98% precision